home *** CD-ROM | disk | FTP | other *** search
- Subject: v07i003: Vi front-end for remote editing, Part01/04
- Newsgroups: mod.sources
- Approved: mirror!rs
-
- Submitted by: Alan Klietz <ihnp4!dicome!mn-at1!alan>
- Mod.sources: Volume 7, Issue 3
- Archive-name: rvi/Part01
-
-
- #!/bin/sh
- # This is a shell archive. Remove anything before this line,
- # then unpack it by saving it in a file and typing "sh file".
- # Wrapped by mirror!rs on Wed Aug 27 00:04:44 EDT 1986
-
- # Exit status; set to 1 on "wc" errors or if would overwrite.
- STATUS=0
- # Contents: BUGFIX BUGFIX2 Makefile.bsd Makefile.usg NEXT_REL README
- # binsearch.c copy.c copyright Manifest regerror.c regexp.c regexp.h
- # regmagic.h rv_change.c rv_column.c rv_delcol.c rv_dot.c
-
- echo x - BUGFIX
- if test -f BUGFIX ; then
- echo BUGFIX exists, putting output in $$BUGFIX
- OUT=$$BUGFIX
- STATUS=1
- else
- OUT=BUGFIX
- fi
- sed 's/^XX//' > $OUT <<'@//E*O*F BUGFIX//'
- XXFix Curses V.2.2 - dereference of null pointer assumes *((char *)0) = '\0'
-
- XX*** _dellines.orig Tue Jan 7 11:58:43 1986
- XX--- _dellines.c Tue Jan 7 11:59:08 1986
- XX***************
- XX*** 30,36
- XX }
- XX if (SP->ml_above + lines > lines_of_memory)
- XX SP->ml_above = lines_of_memory - lines;
- XX! } else if (parm_delete_line && (n>1 || *delete_line==0)) {
- XX tputs(tparm(parm_delete_line, n, SP->phys_y), lines-SP->phys_y, _outch);
- XX }
- XX else if (change_scroll_region && *delete_line==0) {
-
- XX--- 30,36 -----
- XX }
- XX if (SP->ml_above + lines > lines_of_memory)
- XX SP->ml_above = lines_of_memory - lines;
- XX! } else if (parm_delete_line && (n>1 || delete_line==0)) {
- XX tputs(tparm(parm_delete_line, n, SP->phys_y), lines-SP->phys_y, _outch);
- XX }
- XX else if (change_scroll_region && delete_line==0) {
- XX***************
- XX*** 33,39
- XX } else if (parm_delete_line && (n>1 || *delete_line==0)) {
- XX tputs(tparm(parm_delete_line, n, SP->phys_y), lines-SP->phys_y, _outch);
- XX }
- XX! else if (change_scroll_region && *delete_line==0) {
- XX /* vt100: fake delete_line by changing scrolling region */
- XX /* Save since change_scroll_region homes cursor */
- XX tputs(save_cursor, 1, _outch);
-
- XX--- 33,39 -----
- XX } else if (parm_delete_line && (n>1 || delete_line==0)) {
- XX tputs(tparm(parm_delete_line, n, SP->phys_y), lines-SP->phys_y, _outch);
- XX }
- XX! else if (change_scroll_region && delete_line==0) {
- XX /* vt100: fake delete_line by changing scrolling region */
- XX /* Save since change_scroll_region homes cursor */
- XX tputs(save_cursor, 1, _outch);
- @//E*O*F BUGFIX//
- chmod u=rw,g=rw,o=rw $OUT
-
- echo x - BUGFIX2
- if test -f BUGFIX2 ; then
- echo BUGFIX2 exists, putting output in $$BUGFIX2
- OUT=$$BUGFIX2
- STATUS=1
- else
- OUT=BUGFIX2
- fi
- sed 's/^XX//' > $OUT <<'@//E*O*F BUGFIX2//'
- XXFix disable TIMEOUT feature on read-ahead (for 'notimeout' feature
- XXof rvi.) Not really a bug.
-
- XX*** getch.orig Thu Jan 23 15:09:41 1986
- XX--- getch.c Thu Jan 23 15:11:56 1986
- XX***************
- XX*** 113,119
- XX if (SP->kp[i].sends[j] <= 0)
- XX break; /* found */
- XX if (SP->input_queue[j] == -1) {
- XX! SP->input_queue[j] = _fpk(inf);
- XX SP->input_queue[j+1] = -1;
- XX }
- XX if (SP->kp[i].sends[j] != SP->input_queue[j])
-
- XX--- 113,122 -----
- XX if (SP->kp[i].sends[j] <= 0)
- XX break; /* found */
- XX if (SP->input_queue[j] == -1) {
- XX! if (win->_use_keypad == 2)
- XX! read(fileno(inf), &SP->input_queue[j], 1);
- XX! else
- XX! SP->input_queue[j] = _fpk(inf);
- XX SP->input_queue[j+1] = -1;
- XX }
- XX if (SP->kp[i].sends[j] != SP->input_queue[j])
- @//E*O*F BUGFIX2//
- chmod u=rw,g=rw,o=rw $OUT
-
- echo x - Makefile.bsd
- if test -f Makefile.bsd ; then
- echo Makefile.bsd exists, putting output in $$Makefile.bsd
- OUT=$$Makefile.bsd
- STATUS=1
- else
- OUT=Makefile.bsd
- fi
- sed 's/^XX//' > $OUT <<'@//E*O*F Makefile.bsd//'
- XX#
- XX# Use this Makefile for building rvi on BSD systems, or
- XX# on USG systems using the old termcap-style curses.
- XX#
- XXCFLAGS= -O
- XXLDFLAGS=
- XXLIB= -lcurses -ltermlib
-
- XXOBJS= rv_init.o rv_main.o rv_redraw.o rv_input.o rv_move.o rv_cmd.o \
- XX rv_dummy.o rv_print_ln.o rv_scroll.o rv_scroll_bk.o rv_column.o \
- XX rv_where.o rv_misc.o rv_delete.o rv_delcol.o rv_redraw_ln.o \
- XX rv_insert.o rv_undo.o rv_openline.o rv_change.o rv_put.o rv_yank.o \
- XX rv_sync.o rv_xmit.o rv_edit.o rv_fetch.o rv_flash.o rv_dot.o \
- XX rv_join.o rv_forback.o rv_getline.o rv_search.o \
- XX binsearch.o rv_linecmd.o copy.o zero.o rv_quit.o \
- XX regexp.o regerror.o rv_word.o rv_mark.o rv_shell.o rv_fast.o
-
- XXall: rvtest rvi
-
- XXrvtest: rvtest.c
- XX $(CC) rvtest.c -o rvtest
-
- XXrvi: $(OBJS)
- XX $(CC) $(CFLAGS) $(LDFLAGS) $(OBJS) $(LIB) -o rvi
-
- XXprint:
- XX pr -f rv.h rv*.c > list
- XX reverse list > list2
- XX qpr -q sw list2
- XX rm -f list list2
-
- XXbackup:
- XX cp *.[ch] bak
-
- XXlint:
- XX lint *.c $(LIB)
-
- XXclean:
- XX rm -f *.o
- @//E*O*F Makefile.bsd//
- chmod u=rw,g=rw,o=rw $OUT
-
- echo x - Makefile.usg
- if test -f Makefile.usg ; then
- echo Makefile.usg exists, putting output in $$Makefile.usg
- OUT=$$Makefile.usg
- STATUS=1
- else
- OUT=Makefile.usg
- fi
- sed 's/^XX//' > $OUT <<'@//E*O*F Makefile.usg//'
- XX#
- XX# Use this Makefile for building rvi on USG systems (with terminfo).
- XX#
- XXCFLAGS= -O -DKEYPAD -DVIDEO
- XXLDFLAGS=
- XXLIB= -lcurses
-
- XXOBJS= rv_init.o rv_main.o rv_redraw.o rv_input.o rv_move.o rv_cmd.o \
- XX rv_dummy.o rv_print_ln.o rv_scroll.o rv_scroll_bk.o rv_column.o \
- XX rv_where.o rv_misc.o rv_delete.o rv_delcol.o rv_redraw_ln.o \
- XX rv_insert.o rv_undo.o rv_openline.o rv_change.o rv_put.o rv_yank.o \
- XX rv_sync.o rv_xmit.o rv_edit.o rv_fetch.o rv_flash.o rv_dot.o \
- XX rv_join.o rv_forback.o rv_getline.o rv_search.o \
- XX binsearch.o rv_linecmd.o copy.o zero.o rv_quit.o \
- XX regexp.o regerror.o rv_word.o rv_mark.o rv_shell.o rv_fast.o
-
- XXall: rvtest rvi
-
- XXrvtest: rvtest.c
- XX $(CC) rvtest.c -o rvtest
-
- XXrvi: $(OBJS)
- XX $(CC) $(CFLAGS) $(LDFLAGS) $(OBJS) $(LIB) -o rvi
-
- XXprint:
- XX pr -f rv.h rv*.c > list
- XX reverse list > list2
- XX qpr -q sw list2
- XX rm -f list list2
-
- XXbackup:
- XX cp *.[ch] bak
-
- XXlint:
- XX lint *.c $(LIB)
-
- XXclean:
- XX rm -f *.o
- @//E*O*F Makefile.usg//
- chmod u=rw,g=rw,o=rw $OUT
-
- echo x - NEXT_REL
- if test -f NEXT_REL ; then
- echo NEXT_REL exists, putting output in $$NEXT_REL
- OUT=$$NEXT_REL
- STATUS=1
- else
- OUT=NEXT_REL
- fi
- sed 's/^XX//' > $OUT <<'@//E*O*F NEXT_REL//'
-
- XXI have fixed some bugs in in the version of RVI that is posted to mod.sources
- XXin Volume 7. These bugs were found and repaired during the several
- XXweeks it took for rvi to work its way through the network to mod.sources.
- XXI would like to thank Richard Salz for his patience and effort in helping
- XXme find a reliable path to his machine to get the darn thing posted already.
-
- XXThese bugs were pointed out to me by users at the Minnesota Supercomputer
- XXCenter. I am indebted to their persistant efforts, both in finding the
- XXbugs, and in their efforts to coax me into fixing them.
-
- XXA list of bugs appears below. I want to wait a few weeks before sending
- XXthe diffs so that I may incorporate additional bugfixes received from users
- XXimplementing the original distribution.
-
- XXIn the meantime, a version of rvi that incorporates the bugfixes received
- XXso far is available via anonymous ftp. The internet address is:
- XX umn-rei-uc.ARPA
- XXThe sources containing the bugfixes are stored in /staff/rvi/src.
-
- XXA list of bugfixes follows.
-
- XX------------------------------------------------------------------
-
- XXRvi now can fake deleteln and insertln for dumb terminals that do not
- XXhave a change_scroll or insertln/deleteln function.
-
- XXw <file> resets the file_modified flag.
-
- XXA possible coredump caused by deleting the last line in the file was fixed.
-
- XXA spurious ed error message on editing an empty file was removed.
-
- XXEditing a nameless file is now handled correctly for a few bad cases.
-
- XXRemote invokation of rm and echo was changed to avoid conflicts with
- XXshell aliasing.
-
- XXSome error messages were made more verbose.
-
- XXRvi can now interrogate the terminal type remotely (requires putenv()).
-
- XX------------------------------------------------------------------
-
-
- XXPlease send your bug reports to ..ihnp4!dicome!mn-at1!alan.UUCP
- XX or aek@umn-rei-uc.ARPA
-
- XXThank you.
- XX--
- XXAlan Klietz
- XXMinnesota Supercomputer Center
- XX1200 Washington Avenue South
- XXMinneapolis, MN 55415
- XXPh: +1 612 638 0577 ARPA: aek@umn-rei-uc.ARPA
- XX UUCP: ..ihnp4!dicome!mn-at1!alan
-
- XX(*) An affiliate of the University of Minnesota
-
-
- @//E*O*F NEXT_REL//
- chmod u=rw,g=rw,o=rw $OUT
-
- echo x - README
- if test -f README ; then
- echo README exists, putting output in $$README
- OUT=$$README
- STATUS=1
- else
- OUT=README
- fi
- sed 's/^XX//' > $OUT <<'@//E*O*F README//'
-
-
-
-
-
-
-
-
- XX Rvi is a portable distributed screen editor (DSE). It generates ``ed''
- XX commands for execution on a remote machine. It was originally developed
- XX for remote screen editing on CRAY-2 supercomputers.
-
- XX Rvi is most useful
-
- XX o To do screen editing machines where a normal screen editor
- XX is inappropriate (e.g. supercomputers, IBM mainframes)
-
- XX o In a distributed computing environment
-
- XX o Across slow networks (e.g. satellites, ARPANET)
-
-
- XX Portability was emphasized over efficiency. (For example, it
- XX uses curses rather than doing the CRT manipulations directly)
-
-
- XX Rvi has been tested on a number of machines, including
-
- XX Sun Microsystems SUN 2 and 3
- XX DEC VAX-750/780 (both SV and 4.2)
- XX AT&T UNIX PC
- XX IBM PC AT (Xenix 5 and iAPX268 V)
- XX Silicon Graphics IRIS (V)
- XX Gould CONCEPT 32 (UTX/32)
- XX Apollo (Domain IX)
- XX CRAY-2 (UNICOS) [loopback]
-
- XX
-
- XXHow to make rvi:
-
- XX Unpack the shar files.
-
- XX If you have termcap, type cp Makefile.bsd Makefile
-
- XX If you have terminfo, type cp Makefile.usg Makefile. Also
- XX you should install BUGFIX and BUGFIX2 into your terminfo
- XX library. (In particular, BUGFIX is required so that vt100
- XX terminals perform insert/delete line properly.)
-
- XX Type ``make''
-
- XX Test rvi by running rvtest.
-
-
-
- XXRvi talks through pipe file descriptors to ed. The pipe descriptors should
- XXbe created by your terminal program, e.g. TELNET. You are responsible for
- XXmaking the necessary modifications to your TELNET program to do this.
-
- XXYour TELNET program should catch an escape sequence (such as ^]rvi). It
- XXshould then emit a /bin/ed command to the remote machine, create two pipes
- XXon the local machine, and exec rvi.
-
-
- XXRemember:
-
- XXRvi only emits whole lines terminated by a linefeed. You do not need to
- XXchange the terminal modes; rvi takes care of that. Remember to disable
- XXlocal and remote echoing, and do not attempt nl-cr mapping.
-
-
-
- XXBugs:
-
- XXThe screen is redrawn twice on a full screen update across a window
- XXboundary. This is due to the nature of the window fetching algorithm.
-
- XXSome heuristics are used to determine the version of the ed program.
- XXRvi may get confused by a non-standard version of ed (e.g. a version
- XXof ed that print prompts.)
-
- XXTermcap's Curses does not handle the "xn" braindamage flag. I had to
- XXhack in support for it.
-
- XXUseful commands such as %, <, and > are not supported because I can't
- XXthink of a way to do them efficiently via ed.
-
- XXMacros and tags are not supported.
-
- XXScrolling is slow under terminfo.
-
-
- XX--
- XXAlan Klietz
- XXMinnesota Supercomputer Center (*)
- XX2520 Broadway Drive
- XXLauderdale, MN 55113 UUCP: ..ihnp4!dicome!mn-at1!alan.UUCP
- XXPh: +1 612 638 0577 ..caip!meccts!dicome!mn-at1!alan.UUCP
- XX ARPA: aek@umn-rei-uc.ARPA
-
- XX(*) Formerly titled Research Equipment Incorporated.
- XX An affiliate of the University of Minnesota
- @//E*O*F README//
- chmod u=rw,g=rw,o=rw $OUT
-
- echo x - binsearch.c
- if test -f binsearch.c ; then
- echo binsearch.c exists, putting output in $$binsearch.c
- OUT=$$binsearch.c
- STATUS=1
- else
- OUT=binsearch.c
- fi
- sed 's/^XX//' > $OUT <<'@//E*O*F binsearch.c//'
- XX/* binsearch - do a binary search of a structure.
- XX 84/04/07. A. E. Klietz.
- XX*/
-
- XXbinsearch(match_string, structarray, size_struct, num_structs)
- XX/* Search an array of structures for the "match_string" and return
- XX -2 if match_string is not unique
- XX -1 if match_string was not found
- XX i if ith structure matches match_string
-
- XX Each structure contains a string to be compared.
- XXThe structure array must be alphabetized. No error message is
- XXprinted.
- XX The structure must be aligned at least as well as a pointer. */
-
- XXchar *match_string; /* string to match */
- XXregister char *structarray; /* array of structures to search */
- XXshort size_struct; /* size of each structure element in bytes */
- XXshort num_structs; /* number of structures in array */
- XX{
-
- XXregister short pos, diff, lower, upper, indx;
-
- XX if (match_string[0] == '\0') /* if null string */
- XX return(-1); /* no match */
-
- XX lower = 0;
- XX upper = num_structs - 1;
- XX do {
- XX pos = (lower + upper) / 2;
- XX diff = strcmp(&structarray[indx = pos * size_struct], match_string);
- XX if (diff <= 0) /* if match_string >= &structarray[pos] */
- XX lower = pos + 1;
- XX if (diff >= 0) /* if match_string <= &structarray[pos] */
- XX upper = pos - 1;
- XX } while (lower <= upper);
-
- XX if (strcmp(&structarray[indx], match_string) == 0)
- XX return(pos);
-
- XX if (!substring(match_string, &structarray[indx])) {
- XX ++pos;
- XX indx = pos * size_struct;
- XX if (pos > num_structs - 1 || !substring(match_string, &structarray[indx]))
- XX return(-1);
- XX }
- XX
- XX if (pos < num_structs - 1)
- XX if (substring(match_string, &structarray[(pos + 1) * size_struct]))
- XX return(-2); /* not unique error. */
-
- XX return(pos);
- XX}
-
-
- XXsubstring(part, full)
- XX/* Returns TRUE if "part" is a left anchored substring of "full". */
-
- XXregister char *part, *full;
- XX{
- XX register char ch;
-
- XX while ((ch = *part++) == *full++ && ch != '\0')
- XX ;
- XX return(ch == '\0');
- XX}
- @//E*O*F binsearch.c//
- chmod u=rw,g=rw,o=rw $OUT
-
- echo x - copy.c
- if test -f copy.c ; then
- echo copy.c exists, putting output in $$copy.c
- OUT=$$copy.c
- STATUS=1
- else
- OUT=copy.c
- fi
- sed 's/^XX//' > $OUT <<'@//E*O*F copy.c//'
- XX/* copy - copy data structures
- XX 84/12/18. A. E. Klietz.
- XX*/
-
- XX#include "rv.h"
-
- XX#ifdef copy
- XX#undef copy
- XX#endif
-
- XX#ifndef USG
- XXvoid
- XXcopy(to, from, len)
- XXchar *to, *from;
- XXint len;
- XX{
- XX for (; len > 0; --len)
- XX *(to++) = *(from++);
- XX}
- XX#endif
- @//E*O*F copy.c//
- chmod u=rw,g=rw,o=rw $OUT
-
- echo x - copyright
- if test -f copyright ; then
- echo copyright exists, putting output in $$copyright
- OUT=$$copyright
- STATUS=1
- else
- OUT=copyright
- fi
- sed 's/^XX//' > $OUT <<'@//E*O*F copyright//'
- XX/*
- XX * Rvi - Portable distributed screen editor (DSE).
- XX * 86/07/16. Alan Klietz
- XX * Copyright (c) 1986, Research Equipment Incorporated
- XX * Minnesota Supercomputer Center
- XX *
- XX * Permission is hereby granted to use this software on any computer system
- XX * and to copy this software, including for purposes of redistribution, subject
- XX * to the conditions that
- XX *
- XX * o The full text of this copyright message is retained and prominently
- XX * displayed
- XX *
- XX * o No misrepresentation is made as to the authorship of this software
- XX *
- XX * o The software is not used for resale or direct commercial advantage
- XX *
- XX * By copying, installing, or using this software, the user agrees to abide
- XX * by the above terms and agrees that the software is accepted on an "as is"
- XX * basis, WITHOUT WARRANTY expressed or implied, and relieves Research Equip-
- XX * ment Inc., its affiliates, officers, agents, and employees of any and all
- XX * liability, direct of consequential, resulting from copying, installing
- XX * or using this software.
- XX */
- @//E*O*F copyright//
- chmod u=rw,g=rw,o=rw $OUT
-
- echo x - Manifest
- if test -f Manifest ; then
- echo Manifest exists, putting output in $$Manifest
- OUT=$$Manifest
- STATUS=1
- else
- OUT=Manifest
- fi
- sed 's/^XX//' > $OUT <<'@//E*O*F Manifest//'
- XXBUGFIX
- XXBUGFIX2
- XXMakefile.bsd
- XXMakefile.usg
- XXNEXT_REL
- XXREADME
- XXbinsearch.c
- XXcopy.c
- XXcopyright
- XXManifest
- XXregerror.c
- XXregexp.c
- XXregexp.h
- XXregmagic.h
- XXrv.h
- XXrv_change.c
- XXrv_cmd.c
- XXrv_column.c
- XXrv_delcol.c
- XXrv_delete.c
- XXrv_dot.c
- XXrv_dummy.c
- XXrv_edit.c
- XXrv_fast.c
- XXrv_fetch.c
- XXrv_flash.c
- XXrv_forback.c
- XXrv_getline.c
- XXrv_init.c
- XXrv_input.c
- XXrv_insert.c
- XXrv_join.c
- XXrv_linecmd.c
- XXrv_main.c
- XXrv_mark.c
- XXrv_misc.c
- XXrv_move.c
- XXrv_openline.c
- XXrv_print_ln.c
- XXrv_put.c
- XXrv_quit.c
- XXrv_redraw.c
- XXrv_redraw_ln.c
- XXrv_scroll.c
- XXrv_scroll_bk.c
- XXrv_search.c
- XXrv_shell.c
- XXrv_sync.c
- XXrv_undo.c
- XXrv_where.c
- XXrv_word.c
- XXrv_xmit.c
- XXrv_yank.c
- XXrvi.1
- XXrvtest.c
- XXtodo
- XXzero.c
- @//E*O*F Manifest//
- chmod u=rw,g=rw,o=rw $OUT
-
- echo x - regerror.c
- if test -f regerror.c ; then
- echo regerror.c exists, putting output in $$regerror.c
- OUT=$$regerror.c
- STATUS=1
- else
- OUT=regerror.c
- fi
- sed 's/^XX//' > $OUT <<'@//E*O*F regerror.c//'
- XX#include "rv.h"
-
- XXvoid
- XXregerror(s)
- XXchar *s;
- XX{
- XX botprint(TRUE, "%s", s);
- XX}
- @//E*O*F regerror.c//
- chmod u=rw,g=rw,o=rw $OUT
-
- echo x - regexp.c
- if test -f regexp.c ; then
- echo regexp.c exists, putting output in $$regexp.c
- OUT=$$regexp.c
- STATUS=1
- else
- OUT=regexp.c
- fi
- sed 's/^XX//' > $OUT <<'@//E*O*F regexp.c//'
- XX#define RVI /* Modified version for remote vi */
- XX#include <stdio.h>
-
- XX#ifdef RVI
- XX# if !defined(L_ctermid) || defined(sun) /* If BSD system */
- XX# define strchr index
- XX# define STRCSPN
- XX# endif
- XX#endif
- XX/*
- XX * regcomp and regexec -- regsub and regerror are elsewhere
- XX *
- XX * Copyright (c) 1986 by University of Toronto.
- XX * Written by Henry Spencer. Not derived from licensed software.
- XX *
- XX * Permission is granted to anyone to use this software for any
- XX * purpose on any computer system, and to redistribute it freely,
- XX * subject to the following restrictions:
- XX *
- XX * 1. The author is not responsible for the consequences of use of
- XX * this software, no matter how awful, even if they arise
- XX * from defects in it.
- XX *
- XX * 2. The origin of this software must not be misrepresented, either
- XX * by explicit claim or by omission.
- XX *
- XX * 3. Altered versions must be plainly marked as such, and must not
- XX * be misrepresented as being the original software.
- XX *
- XX * Beware that some of this code is subtly aware of the way operator
- XX * precedence is structured in regular expressions. Serious changes in
- XX * regular-expression syntax might require a total rethink.
- XX */
- XX#include <stdio.h>
- XX#include "regexp.h"
- XX#include "regmagic.h"
-
- XX/*
- XX * The "internal use only" fields in regexp.h are present to pass info from
- XX * compile to execute that permits the execute phase to run lots faster on
- XX * simple cases. They are:
- XX *
- XX * regstart char that must begin a match; '\0' if none obvious
- XX * reganch is the match anchored (at beginning-of-line only)?
- XX * regmust string (pointer into program) that match must include, or NULL
- XX * regmlen length of regmust string
- XX *
- XX * Regstart and reganch permit very fast decisions on suitable starting points
- XX * for a match, cutting down the work a lot. Regmust permits fast rejection
- XX * of lines that cannot possibly match. The regmust tests are costly enough
- XX * that regcomp() supplies a regmust only if the r.e. contains something
- XX * potentially expensive (at present, the only such thing detected is * or +
- XX * at the start of the r.e., which can involve a lot of backup). Regmlen is
- XX * supplied because the test in regexec() needs it and regcomp() is computing
- XX * it anyway.
- XX */
-
- XX/*
- XX * Structure for regexp "program". This is essentially a linear encoding
- XX * of a nondeterministic finite-state machine (aka syntax charts or
- XX * "railroad normal form" in parsing technology). Each node is an opcode
- XX * plus a "next" pointer, possibly plus an operand. "Next" pointers of
- XX * all nodes except BRANCH implement concatenation; a "next" pointer with
- XX * a BRANCH on both ends of it is connecting two alternatives. (Here we
- XX * have one of the subtle syntax dependencies: an individual BRANCH (as
- XX * opposed to a collection of them) is never concatenated with anything
- XX * because of operator precedence.) The operand of some types of node is
- XX * a literal string; for others, it is a node leading into a sub-FSM. In
- XX * particular, the operand of a BRANCH node is the first node of the branch.
- XX * (NB this is *not* a tree structure: the tail of the branch connects
- XX * to the thing following the set of BRANCHes.) The opcodes are:
- XX */
-
- XX/* definition number opnd? meaning */
- XX#define END 0 /* no End of program. */
- XX#define BOL 1 /* no Match "" at beginning of line. */
- XX#define EOL 2 /* no Match "" at end of line. */
- XX#define ANY 3 /* no Match any one character. */
- XX#define ANYOF 4 /* str Match any character in this string. */
- XX#define ANYBUT 5 /* str Match any character not in this string. */
- XX#define BRANCH 6 /* node Match this alternative, or the next... */
- XX#define BACK 7 /* no Match "", "next" ptr points backward. */
- XX#define EXACTLY 8 /* str Match this string. */
- XX#define NOTHING 9 /* no Match empty string. */
- XX#define STAR 10 /* node Match this (simple) thing 0 or more times. */
- XX#define PLUS 11 /* node Match this (simple) thing 1 or more times. */
- XX#define OPEN 20 /* no Mark this point in input as start of #n. */
- XX /* OPEN+1 is number 1, etc. */
- XX#define CLOSE 30 /* no Analogous to OPEN. */
-
- XX/*
- XX * Opcode notes:
- XX *
- XX * BRANCH The set of branches constituting a single choice are hooked
- XX * together with their "next" pointers, since precedence prevents
- XX * anything being concatenated to any individual branch. The
- XX * "next" pointer of the last BRANCH in a choice points to the
- XX * thing following the whole choice. This is also where the
- XX * final "next" pointer of each individual branch points; each
- XX * branch starts with the operand node of a BRANCH node.
- XX *
- XX * BACK Normal "next" pointers all implicitly point forward; BACK
- XX * exists to make loop structures possible.
- XX *
- XX * STAR,PLUS '?', and complex '*' and PLUSSIGN, are implemented as circular
- XX * BRANCH structures using BACK. Simple cases (one character
- XX * per match) are implemented with STAR and PLUS for speed
- XX * and to minimize recursive plunges.
- XX *
- XX * OPEN,CLOSE ...are numbered at compile time.
- XX */
-
- XX/*
- XX * A node is one char of opcode followed by two chars of "next" pointer.
- XX * "Next" pointers are stored as two 8-bit pieces, high order first. The
- XX * value is a positive offset from the opcode of the node containing it.
- XX * An operand, if any, simply follows the node. (Note that much of the
- XX * code generation knows about this implicit relationship.)
- XX *
- XX * Using two bytes for the "next" pointer is vast overkill for most things,
- XX * but allows patterns to get big without disasters.
- XX */
- XX#define OP(p) (*(p))
- XX#define NEXT(p) (((*((p)+1)&0377)<<8) + *((p)+2)&0377)
- XX#define OPERAND(p) ((p) + 3)
-
- XX/*
- XX * See regmagic.h for one further detail of program structure.
- XX */
-
-
- XX/*
- XX * Utility definitions.
- XX */
- XX#ifndef CHARBITS
- XX#define UCHARAT(p) ((int)*(unsigned char *)(p))
- XX#else
- XX#define UCHARAT(p) ((int)*(p)&CHARBITS)
- XX#endif
-
- XX#define FAIL(m) { regerror(m); return(NULL); }
- XX#define ISMULT(c) ((c) == '*' || (c) == PLUSSIGN || (c) == '?')
-
- XX#ifndef RVI /* Original version */
- XX#define META "^$.[()|+*\\"
- XX#define LPAREN '('
- XX#define RPAREN ')'
- XX#define BAR '|'
- XX#define PLUSSIGN '+'
- XX#define QUES '?'
- XX#else /* Modified version for rvi */
- XX#define META "^$.[?*\\"
- XX#define LPAREN 255
- XX#define RPAREN 254
- XX#define BAR 253
- XX#define PLUSSIGN 252
- XX#define QUES 251
- XX#endif
-
- XX/*
- XX * Flags to be passed up and down.
- XX */
- XX#define HASWIDTH 01 /* Known never to match null string. */
- XX#define SIMPLE 02 /* Simple enough to be STAR/PLUS operand. */
- XX#define SPSTART 04 /* Starts with * or +. */
- XX#define WORST 0 /* Worst case. */
-
- XX/*
- XX * Global work variables for regcomp().
- XX */
- XXstatic char *regparse; /* Input-scan pointer. */
- XXstatic int regnpar; /* () count. */
- XXstatic char regdummy;
- XXstatic char *regcode; /* Code-emit pointer; ®dummy = don't. */
- XXstatic long regsize; /* Code size. */
-
- XX/*
- XX * Forward declarations for regcomp()'s friends.
- XX */
- XX#ifndef STATIC
- XX#define STATIC static
- XX#endif
- XXSTATIC char *reg();
- XXSTATIC char *regbranch();
- XXSTATIC char *regpiece();
- XXSTATIC char *regatom();
- XXSTATIC char *regnode();
- XXSTATIC char *regnext();
- XXSTATIC void regc();
- XXSTATIC void reginsert();
- XXSTATIC void regtail();
- XXSTATIC void regoptail();
- XX#ifdef STRCSPN
- XXSTATIC int strcspn();
- XX#endif
-
- XX/*
- XX - regcomp - compile a regular expression into internal code
- XX *
- XX * We can't allocate space until we know how big the compiled form will be,
- XX * but we can't compile it (and thus know how big it is) until we've got a
- XX * place to put the code. So we cheat: we compile it twice, once with code
- XX * generation turned off and size counting turned on, and once "for real".
- XX * This also means that we don't allocate space until we are sure that the
- XX * thing really will compile successfully, and we never have to move the
- XX * code and thus invalidate pointers into it. (Note that it has to be in
- XX * one piece because free() must be able to free it all.)
- XX *
- XX * Beware that the optimization-preparation code in here knows about some
- XX * of the structure of the compiled regexp.
- XX */
- XXregexp *
- XXregcomp(exp)
- XXchar *exp;
- XX{
- XX register regexp *r;
- XX register char *scan;
- XX register char *longest;
- XX register int len;
- XX int flags;
- XX extern char *malloc();
-
- XX if (exp == NULL)
- XX FAIL("NULL argument");
-
- XX /* First pass: determine size, legality. */
- XX regparse = exp;
- XX regnpar = 1;
- XX regsize = 0L;
- XX regcode = ®dummy;
- XX regc(MAGIC);
- XX if (reg(0, &flags) == NULL)
- XX return(NULL);
-
- XX /* Small enough for pointer-storage convention? */
- XX if (regsize >= 32767L) /* Probably could be 65535L. */
- XX FAIL("regexp too big");
-
- XX /* Allocate space. */
- XX r = (regexp *)malloc(sizeof(regexp) + (unsigned)regsize);
- XX if (r == NULL)
- XX FAIL("out of space");
-
- XX /* Second pass: emit code. */
- XX regparse = exp;
- XX regnpar = 1;
- XX regcode = r->program;
- XX regc(MAGIC);
- XX if (reg(0, &flags) == NULL)
- XX return(NULL);
-
- XX /* Dig out information for optimizations. */
- XX r->regstart = '\0'; /* Worst-case defaults. */
- XX r->reganch = 0;
- XX r->regmust = NULL;
- XX r->regmlen = 0;
- XX scan = r->program+1; /* First BRANCH. */
- XX if (OP(regnext(scan)) == END) { /* Only one top-level choice. */
- XX scan = OPERAND(scan);
-
- XX /* Starting-point info. */
- XX if (OP(scan) == EXACTLY)
- XX r->regstart = *OPERAND(scan);
- XX else if (OP(scan) == BOL)
- XX r->reganch++;
-
- XX /*
- XX * If there's something expensive in the r.e., find the
- XX * longest literal string that must appear and make it the
- XX * regmust. Resolve ties in favor of later strings, since
- XX * the regstart check works with the beginning of the r.e.
- XX * and avoiding duplication strengthens checking. Not a
- XX * strong reason, but sufficient in the absence of others.
- XX */
- XX if (flags&SPSTART) {
- XX longest = NULL;
- XX len = 0;
- XX for (; scan != NULL; scan = regnext(scan))
- XX if (OP(scan) == EXACTLY && strlen(OPERAND(scan)) >= len) {
- XX longest = OPERAND(scan);
- XX len = strlen(OPERAND(scan));
- XX }
- XX r->regmust = longest;
- XX r->regmlen = len;
- XX }
- XX }
-
- XX return(r);
- XX}
-
- XX/*
- XX - reg - regular expression, i.e. main body or parenthesized thing
- XX *
- XX * Caller must absorb opening parenthesis.
- XX *
- XX * Combining parenthesis handling with the base level of regular expression
- XX * is a trifle forced, but the need to tie the tails of the branches to what
- XX * follows makes it hard to avoid.
- XX */
- XXstatic char *
- XXreg(paren, flagp)
- XXint paren; /* Parenthesized? */
- XXint *flagp;
- XX{
- XX register char *ret;
- XX register char *br;
- XX register char *ender;
- XX register int parno;
- XX int flags;
-
- XX *flagp = HASWIDTH; /* Tentatively. */
-
- XX /* Make an OPEN node, if parenthesized. */
- XX if (paren) {
- XX if (regnpar >= NSUBEXP)
- XX FAIL("too many ()");
- XX parno = regnpar;
- XX regnpar++;
- XX ret = regnode(OPEN+parno);
- XX } else
- XX ret = NULL;
-
- XX /* Pick up the branches, linking them together. */
- XX br = regbranch(&flags);
- XX if (br == NULL)
- XX return(NULL);
- XX if (ret != NULL)
- XX regtail(ret, br); /* OPEN -> first. */
- XX else
- XX ret = br;
- XX if (!(flags&HASWIDTH))
- XX *flagp &= ~HASWIDTH;
- XX *flagp |= flags&SPSTART;
- XX while (*regparse == BAR) {
- XX regparse++;
- XX br = regbranch(&flags);
- XX if (br == NULL)
- XX return(NULL);
- XX regtail(ret, br); /* BRANCH -> BRANCH. */
- XX if (!(flags&HASWIDTH))
- XX *flagp &= ~HASWIDTH;
- XX *flagp |= flags&SPSTART;
- XX }
-
- XX /* Make a closing node, and hook it on the end. */
- XX ender = regnode((paren) ? CLOSE+parno : END);
- XX regtail(ret, ender);
-
- XX /* Hook the tails of the branches to the closing node. */
- XX for (br = ret; br != NULL; br = regnext(br))
- XX regoptail(br, ender);
-
- XX /* Check for proper termination. */
- XX if (paren && *regparse++ != RPAREN) {
- XX FAIL("unmatched ()");
- XX } else if (!paren && *regparse != '\0') {
- XX if (*regparse == RPAREN) {
- XX FAIL("unmatched ()");
- XX } else
- XX FAIL("junk on end"); /* "Can't happen". */
- XX /* NOTREACHED */
- XX }
-
- XX return(ret);
- XX}
-
- XX/*
- XX - regbranch - one alternative of an | operator
- XX *
- XX * Implements the concatenation operator.
- XX */
- XXstatic char *
- XXregbranch(flagp)
- XXint *flagp;
- XX{
- XX register char *ret;
- XX register char *chain;
- XX register char *latest;
- XX int flags;
-
- XX *flagp = WORST; /* Tentatively. */
-
- XX ret = regnode(BRANCH);
- XX chain = NULL;
- XX while (*regparse != '\0' && *regparse != BAR && *regparse != RPAREN) {
- XX latest = regpiece(&flags);
- XX if (latest == NULL)
- XX return(NULL);
- XX *flagp |= flags&HASWIDTH;
- XX if (chain == NULL) /* First piece. */
- XX *flagp |= flags&SPSTART;
- XX else
- XX regtail(chain, latest);
- XX chain = latest;
- XX }
- XX if (chain == NULL) /* Loop ran zero times. */
- XX (void) regnode(NOTHING);
-
- XX return(ret);
- XX}
-
- XX/*
- XX - regpiece - something followed by possible [*+?]
- XX *
- XX * Note that the branching code sequences used for ? and the general cases
- XX * of * and + are somewhat optimized: they use the same NOTHING node as
- XX * both the endmarker for their branch list and the body of the last branch.
- XX * It might seem that this node could be dispensed with entirely, but the
- XX * endmarker role is not redundant.
- XX */
- XXstatic char *
- XXregpiece(flagp)
- XXint *flagp;
- XX{
- XX register char *ret;
- XX register char op;
- XX register char *next;
- XX int flags;
-
- XX ret = regatom(&flags);
- XX if (ret == NULL)
- XX return(NULL);
-
- XX op = *regparse;
- XX if (!ISMULT(op)) {
- XX *flagp = flags;
- XX return(ret);
- XX }
-
- XX if (!(flags&HASWIDTH) && op != '?')
- XX FAIL("*+ operand could be empty");
- XX *flagp = (op != PLUSSIGN) ? (WORST|SPSTART) : (WORST|HASWIDTH);
-
- XX if (op == '*' && (flags&SIMPLE))
- XX reginsert(STAR, ret);
- XX else if (op == '*') {
- XX /* Emit x* as (x&|), where & means "self". */
- XX reginsert(BRANCH, ret); /* Either x */
- XX regoptail(ret, regnode(BACK)); /* and loop */
- XX regoptail(ret, ret); /* back */
- XX regtail(ret, regnode(BRANCH)); /* or */
- XX regtail(ret, regnode(NOTHING)); /* null. */
- XX } else if (op == PLUSSIGN && (flags&SIMPLE))
- XX reginsert(PLUS, ret);
- XX else if (op == PLUSSIGN) {
- XX /* Emit x+ as x(&|), where & means "self". */
- XX next = regnode(BRANCH); /* Either */
- XX regtail(ret, next);
- XX regtail(regnode(BACK), ret); /* loop back */
- XX regtail(next, regnode(BRANCH)); /* or */
- XX regtail(ret, regnode(NOTHING)); /* null. */
- XX } else if (op == '?') {
- XX /* Emit x? as (x|) */
- XX reginsert(BRANCH, ret); /* Either x */
- XX regtail(ret, regnode(BRANCH)); /* or */
- XX next = regnode(NOTHING); /* null. */
- XX regtail(ret, next);
- XX regoptail(ret, next);
- XX }
- XX regparse++;
- XX if (ISMULT(*regparse))
- XX FAIL("nested *?+");
-
- XX return(ret);
- XX}
-
- XX/*
- XX - regatom - the lowest level
- XX *
- XX * Optimization: gobbles an entire sequence of ordinary characters so that
- XX * it can turn them into a single node, which is smaller to store and
- XX * faster to run. Backslashed characters are exceptions, each becoming a
- XX * separate node; the code is simpler that way and it's not worth fixing.
- XX */
- XXstatic char *
- XXregatom(flagp)
- XXint *flagp;
- XX{
- XX register char *ret;
- XX int flags;
-
- XX *flagp = WORST; /* Tentatively. */
-
- XX switch (*regparse++) {
- XX case '^':
- XX ret = regnode(BOL);
- XX break;
- XX case '$':
- XX ret = regnode(EOL);
- XX break;
- XX case '.':
- XX ret = regnode(ANY);
- XX *flagp |= HASWIDTH|SIMPLE;
- XX break;
- XX case '[': {
- XX register int class;
- XX register int classend;
-
- XX if (*regparse == '^') { /* Complement of range. */
- XX ret = regnode(ANYBUT);
- XX regparse++;
- XX } else
- XX ret = regnode(ANYOF);
- XX if (*regparse == ']' || *regparse == '-')
- XX regc(*regparse++);
- XX while (*regparse != '\0' && *regparse != ']') {
- XX if (*regparse == '-') {
- XX regparse++;
- XX if (*regparse == ']' || *regparse == '\0')
- XX regc('-');
- XX else {
- XX class = UCHARAT(regparse-2)+1;
- XX classend = UCHARAT(regparse);
- XX if (class > classend+1)
- XX FAIL("invalid [] range");
- XX for (; class <= classend; class++)
- XX regc(class);
- XX regparse++;
- XX }
- XX } else
- XX regc(*regparse++);
- XX }
- XX regc('\0');
- XX if (*regparse != ']')
- XX FAIL("unmatched []");
- XX regparse++;
- XX *flagp |= HASWIDTH|SIMPLE;
- XX }
- XX break;
- XX case LPAREN:
- XX ret = reg(1, &flags);
- XX if (ret == NULL)
- XX return(NULL);
- XX *flagp |= flags&(HASWIDTH|SPSTART);
- XX break;
- XX case '\0':
- XX case BAR:
- XX case RPAREN:
- XX FAIL("internal urp"); /* Supposed to be caught earlier. */
- XX break;
- XX case '?':
- XX case PLUSSIGN:
- XX case '*':
- XX FAIL("?+* follows nothing");
- XX break;
- XX case '\\':
- XX if (*regparse == '\0')
- XX FAIL("trailing \\");
- XX ret = regnode(EXACTLY);
- XX regc(*regparse++);
- XX regc('\0');
- XX *flagp |= HASWIDTH|SIMPLE;
- XX break;
- XX default: {
- XX register int len;
- XX register char ender;
-
- XX regparse--;
- XX len = strcspn(regparse, META);
- XX if (len <= 0)
- XX FAIL("internal disaster");
- XX ender = *(regparse+len);
- XX if (len > 1 && ISMULT(ender))
- XX len--; /* Back off clear of ?+* operand. */
- XX *flagp |= HASWIDTH;
- XX if (len == 1)
- XX *flagp |= SIMPLE;
- XX ret = regnode(EXACTLY);
- XX while (len > 0) {
- XX regc(*regparse++);
- XX len--;
- XX }
- XX regc('\0');
- XX }
- XX break;
- XX }
-
- XX return(ret);
- XX}
-
- XX/*
- XX - regnode - emit a node
- XX */
- XXstatic char * /* Location. */
- XXregnode(op)
- XXchar op;
- XX{
- XX register char *ret;
- XX register char *ptr;
-
- XX ret = regcode;
- XX if (ret == ®dummy) {
- XX regsize += 3;
- XX return(ret);
- XX }
-
- XX ptr = ret;
- XX *ptr++ = op;
- XX *ptr++ = '\0'; /* Null "next" pointer. */
- XX *ptr++ = '\0';
- XX regcode = ptr;
-
- XX return(ret);
- XX}
-
- XX/*
- XX - regc - emit (if appropriate) a byte of code
- XX */
- XXstatic void
- XXregc(b)
- XXchar b;
- XX{
- XX if (regcode != ®dummy)
- XX *regcode++ = b;
- XX else
- XX regsize++;
- XX}
-
- XX/*
- XX - reginsert - insert an operator in front of already-emitted operand
- XX *
- XX * Means relocating the operand.
- XX */
- XXstatic void
- XXreginsert(op, opnd)
- XXchar op;
- XXchar *opnd;
- XX{
- XX register char *src;
- XX register char *dst;
- XX register char *place;
-
- XX if (regcode == ®dummy) {
- XX regsize += 3;
- XX return;
- XX }
-
- XX src = regcode;
- XX regcode += 3;
- XX dst = regcode;
- XX while (src > opnd)
- XX *--dst = *--src;
-
- XX place = opnd; /* Op node, where operand used to be. */
- XX *place++ = op;
- XX *place++ = '\0';
- XX *place++ = '\0';
- XX}
-
- XX/*
- XX - regtail - set the next-pointer at the end of a node chain
- XX */
- XXstatic void
- XXregtail(p, val)
- XXchar *p;
- XXchar *val;
- XX{
- XX register char *scan;
- XX register char *temp;
- XX register int offset;
-
- XX if (p == ®dummy)
- XX return;
-
- XX /* Find last node. */
- XX scan = p;
- XX for (;;) {
- XX temp = regnext(scan);
- XX if (temp == NULL)
- XX break;
- XX scan = temp;
- XX }
-
- XX if (OP(scan) == BACK)
- XX offset = scan - val;
- XX else
- XX offset = val - scan;
- XX *(scan+1) = (offset>>8)&0377;
- XX *(scan+2) = offset&0377;
- XX}
-
- XX/*
- XX - regoptail - regtail on operand of first argument; nop if operandless
- XX */
- XXstatic void
- XXregoptail(p, val)
- XXchar *p;
- XXchar *val;
- XX{
- XX /* "Operandless" and "op != BRANCH" are synonymous in practice. */
- XX if (p == NULL || p == ®dummy || OP(p) != BRANCH)
- XX return;
- XX regtail(OPERAND(p), val);
- XX}
-
- XX/*
- XX * regexec and friends
- XX */
-
- XX/*
- XX * Global work variables for regexec().
- XX */
- XXstatic char *reginput; /* String-input pointer. */
- XXstatic char *regbol; /* Beginning of input, for ^ check. */
- XXstatic char **regstartp; /* Pointer to startp array. */
- XXstatic char **regendp; /* Ditto for endp. */
-
- XX/*
- XX * Forwards.
- XX */
- XXSTATIC int regtry();
- XXSTATIC int regmatch();
- XXSTATIC int regrepeat();
-
- XX#ifdef DEBUG
- XXint regnarrate = 0;
- XXvoid regdump();
- XXSTATIC char *regprop();
- XX#endif
-
- XX/*
- XX - regexec - match a regexp against a string
- XX */
- XXint
- XXregexec(prog, string)
- XXregister regexp *prog;
- XXregister char *string;
- XX{
- XX register char *s;
- XX extern char *strchr();
-
- XX /* Be paranoid... */
- XX if (prog == NULL || string == NULL) {
- XX regerror("NULL parameter");
- XX return(0);
- XX }
-
- XX /* Check validity of program. */
- XX if (UCHARAT(prog->program) != MAGIC) {
- XX regerror("corrupted program");
- XX return(0);
- XX }
-
- XX /* If there is a "must appear" string, look for it. */
- XX if (prog->regmust != NULL) {
- XX s = string;
- XX while ((s = strchr(s, prog->regmust[0])) != NULL) {
- XX if (strncmp(s, prog->regmust, prog->regmlen) == 0)
- XX break; /* Found it. */
- XX s++;
- XX }
- XX if (s == NULL) /* Not present. */
- XX return(0);
- XX }
-
- XX /* Mark beginning of line for ^ . */
- XX regbol = string;
-
- XX /* Simplest case: anchored match need be tried only once. */
- XX if (prog->reganch)
- XX return(regtry(prog, string));
-
- XX /* Messy cases: unanchored match. */
- XX s = string;
- XX if (prog->regstart != '\0')
- XX /* We know what char it must start with. */
- XX while ((s = strchr(s, prog->regstart)) != NULL) {
- XX if (regtry(prog, s))
- XX return(1);
- XX s++;
- XX }
- XX else
- XX /* We don't -- general case. */
- XX do {
- XX if (regtry(prog, s))
- XX return(1);
- XX } while (*s++ != '\0');
-
- XX /* Failure. */
- XX return(0);
- XX}
-
- XX/*
- XX - regtry - try match at specific point
- XX */
- XXstatic int /* 0 failure, 1 success */
- XXregtry(prog, string)
- XXregexp *prog;
- XXchar *string;
- XX{
- XX register int i;
- XX register char **sp;
- XX register char **ep;
-
- XX reginput = string;
- XX regstartp = prog->startp;
- XX regendp = prog->endp;
-
- XX sp = prog->startp;
- XX ep = prog->endp;
- XX for (i = NSUBEXP; i > 0; i--) {
- XX *sp++ = NULL;
- XX *ep++ = NULL;
- XX }
- XX if (regmatch(prog->program + 1)) {
- XX prog->startp[0] = string;
- XX prog->endp[0] = reginput;
- XX return(1);
- XX } else
- XX return(0);
- XX}
-
- XX/*
- XX - regmatch - main matching routine
- XX *
- XX * Conceptually the strategy is simple: check to see whether the current
- XX * node matches, call self recursively to see whether the rest matches,
- XX * and then act accordingly. In practice we make some effort to avoid
- XX * recursion, in particular by going through "ordinary" nodes (that don't
- XX * need to know whether the rest of the match failed) by a loop instead of
- XX * by recursion.
- XX */
- XXstatic int /* 0 failure, 1 success */
- XXregmatch(prog)
- XXchar *prog;
- XX{
- XX register char *scan; /* Current node. */
- XX char *next; /* Next node. */
- XX extern char *strchr();
-
- XX scan = prog;
- XX#ifdef DEBUG
- XX if (scan != NULL && regnarrate)
- XX fprintf(stderr, "%s(\n", regprop(scan));
- XX#endif
- XX while (scan != NULL) {
- XX#ifdef DEBUG
- XX if (regnarrate)
- XX fprintf(stderr, "%s...\n", regprop(scan));
- XX#endif
- XX next = regnext(scan);
-
- XX switch (OP(scan)) {
- XX case BOL:
- XX if (reginput != regbol)
- XX return(0);
- XX break;
- XX case EOL:
- XX if (*reginput != '\0')
- XX return(0);
- XX break;
- XX case ANY:
- XX if (*reginput == '\0')
- XX return(0);
- XX reginput++;
- XX break;
- XX case EXACTLY: {
- XX register int len;
- XX register char *opnd;
-
- XX opnd = OPERAND(scan);
- XX /* Inline the first character, for speed. */
- XX if (*opnd != *reginput)
- XX return(0);
- XX len = strlen(opnd);
- XX if (len > 1 && strncmp(opnd, reginput, len) != 0)
- XX return(0);
- XX reginput += len;
- XX }
- XX break;
- XX case ANYOF:
- XX if (*reginput == '\0' || strchr(OPERAND(scan), *reginput) == NULL)
- XX return(0);
- XX reginput++;
- XX break;
- XX case ANYBUT:
- XX if (*reginput == '\0' || strchr(OPERAND(scan), *reginput) != NULL)
- XX return(0);
- XX reginput++;
- XX break;
- XX case NOTHING:
- XX break;
- XX case BACK:
- XX break;
- XX case OPEN+1:
- XX case OPEN+2:
- XX case OPEN+3:
- XX case OPEN+4:
- XX case OPEN+5:
- XX case OPEN+6:
- XX case OPEN+7:
- XX case OPEN+8:
- XX case OPEN+9: {
- XX register int no;
- XX register char *save;
-
- XX no = OP(scan) - OPEN;
- XX save = reginput;
-
- XX if (regmatch(next)) {
- XX /*
- XX * Don't set startp if some later
- XX * invocation of the same parentheses
- XX * already has.
- XX */
- XX if (regstartp[no] == NULL)
- XX regstartp[no] = save;
- XX return(1);
- XX } else
- XX return(0);
- XX }
- XX break;
- XX case CLOSE+1:
- XX case CLOSE+2:
- XX case CLOSE+3:
- XX case CLOSE+4:
- XX case CLOSE+5:
- XX case CLOSE+6:
- XX case CLOSE+7:
- XX case CLOSE+8:
- XX case CLOSE+9: {
- XX register int no;
- XX register char *save;
-
- XX no = OP(scan) - CLOSE;
- XX save = reginput;
-
- XX if (regmatch(next)) {
- XX /*
- XX * Don't set endp if some later
- XX * invocation of the same parentheses
- XX * already has.
- XX */
- XX if (regendp[no] == NULL)
- XX regendp[no] = save;
- XX return(1);
- XX } else
- XX return(0);
- XX }
- XX break;
- XX case BRANCH: {
- XX register char *save;
-
- XX if (OP(next) != BRANCH) /* No choice. */
- XX next = OPERAND(scan); /* Avoid recursion. */
- XX else {
- XX do {
- XX save = reginput;
- XX if (regmatch(OPERAND(scan)))
- XX return(1);
- XX reginput = save;
- XX scan = regnext(scan);
- XX } while (scan != NULL && OP(scan) == BRANCH);
- XX return(0);
- XX /* NOTREACHED */
- XX }
- XX }
- XX break;
- XX case STAR:
- XX case PLUS: {
- XX register char nextch;
- XX register int no;
- XX register char *save;
- XX register int min;
-
- XX /*
- XX * Lookahead to avoid useless match attempts
- XX * when we know what character comes next.
- XX */
- XX nextch = '\0';
- XX if (OP(next) == EXACTLY)
- XX nextch = *OPERAND(next);
- XX min = (OP(scan) == STAR) ? 0 : 1;
- XX save = reginput;
- XX no = regrepeat(OPERAND(scan));
- XX while (no >= min) {
- XX /* If it could work, try it. */
- XX if (nextch == '\0' || *reginput == nextch)
- XX if (regmatch(next))
- XX return(1);
- XX /* Couldn't or didn't -- back up. */
- XX no--;
- XX reginput = save + no;
- XX }
- XX return(0);
- XX }
- XX break;
- XX case END:
- XX return(1); /* Success! */
- XX break;
- XX default:
- XX regerror("memory corruption");
- XX return(0);
- XX break;
- XX }
-
- XX scan = next;
- XX }
-
- XX /*
- XX * We get here only if there's trouble -- normally "case END" is
- XX * the terminating point.
- XX */
- XX regerror("corrupted pointers");
- XX return(0);
- XX}
-
- XX/*
- XX - regrepeat - repeatedly match something simple, report how many
- XX */
- XXstatic int
- XXregrepeat(p)
- XXchar *p;
- XX{
- XX register int count = 0;
- XX register char *scan;
- XX register char *opnd;
-
- XX scan = reginput;
- XX opnd = OPERAND(p);
- XX switch (OP(p)) {
- XX case ANY:
- XX count = strlen(scan);
- XX scan += count;
- XX break;
- XX case EXACTLY:
- XX while (*opnd == *scan) {
- XX count++;
- XX scan++;
- XX }
- XX break;
- XX case ANYOF:
- XX while (*scan != '\0' && strchr(opnd, *scan) != NULL) {
- XX count++;
- XX scan++;
- XX }
- XX break;
- XX case ANYBUT:
- XX while (*scan != '\0' && strchr(opnd, *scan) == NULL) {
- XX count++;
- XX scan++;
- XX }
- XX break;
- XX default: /* Oh dear. Called inappropriately. */
- XX regerror("internal foulup");
- XX count = 0; /* Best compromise. */
- XX break;
- XX }
- XX reginput = scan;
-
- XX return(count);
- XX}
-
- XX/*
- XX - regnext - dig the "next" pointer out of a node
- XX */
- XXstatic char *
- XXregnext(p)
- XXregister char *p;
- XX{
- XX register int offset;
-
- XX if (p == ®dummy)
- XX return(NULL);
-
- XX offset = NEXT(p);
- XX if (offset == 0)
- XX return(NULL);
-
- XX if (OP(p) == BACK)
- XX return(p-offset);
- XX else
- XX return(p+offset);
- XX}
-
- XX#ifdef DEBUG
-
- XXSTATIC char *regprop();
-
- XX/*
- XX - regdump - dump a regexp onto stdout in vaguely comprehensible form
- XX */
- XXvoid
- XXregdump(r)
- XXregexp *r;
- XX{
- XX register char *s;
- XX register char op = EXACTLY; /* Arbitrary non-END op. */
- XX register char *next;
- XX extern char *strchr();
-
-
- XX s = r->program + 1;
- XX while (op != END) { /* While that wasn't END last time... */
- XX op = OP(s);
- XX printf("%2d%s", s-r->program, regprop(s)); /* Where, what. */
- XX next = regnext(s);
- XX if (next == NULL) /* Next ptr. */
- XX printf("(0)");
- XX else
- XX printf("(%d)", (s-r->program)+(next-s));
- XX s += 3;
- XX if (op == ANYOF || op == ANYBUT || op == EXACTLY) {
- XX /* Literal string, where present. */
- XX while (*s != '\0') {
- XX putchar(*s);
- XX s++;
- XX }
- XX s++;
- XX }
- XX putchar('\n');
- XX }
-
- XX /* Header fields of interest. */
- XX if (r->regstart != '\0')
- XX printf("start `%c' ", r->regstart);
- XX if (r->reganch)
- XX printf("anchored ");
- XX if (r->regmust != NULL)
- XX printf("must have \"%s\"", r->regmust);
- XX printf("\n");
- XX}
-
- XX/*
- XX - regprop - printable representation of opcode
- XX */
- XXstatic char *
- XXregprop(op)
- XXchar *op;
- XX{
- XX register char *p;
- XX static char buf[50];
-
- XX (void) strcpy(buf, ":");
-
- XX switch (OP(op)) {
- XX case BOL:
- XX p = "BOL";
- XX break;
- XX case EOL:
- XX p = "EOL";
- XX break;
- XX case ANY:
- XX p = "ANY";
- XX break;
- XX case ANYOF:
- XX p = "ANYOF";
- XX break;
- XX case ANYBUT:
- XX p = "ANYBUT";
- XX break;
- XX case BRANCH:
- XX p = "BRANCH";
- XX break;
- XX case EXACTLY:
- XX p = "EXACTLY";
- XX break;
- XX case NOTHING:
- XX p = "NOTHING";
- XX break;
- XX case BACK:
- XX p = "BACK";
- XX break;
- XX case END:
- XX p = "END";
- XX break;
- XX case OPEN+1:
- XX case OPEN+2:
- XX case OPEN+3:
- XX case OPEN+4:
- XX case OPEN+5:
- XX case OPEN+6:
- XX case OPEN+7:
- XX case OPEN+8:
- XX case OPEN+9:
- XX sprintf(buf+strlen(buf), "OPEN%d", OP(op)-OPEN);
- XX p = NULL;
- XX break;
- XX case CLOSE+1:
- XX case CLOSE+2:
- XX case CLOSE+3:
- XX case CLOSE+4:
- XX case CLOSE+5:
- XX case CLOSE+6:
- XX case CLOSE+7:
- XX case CLOSE+8:
- XX case CLOSE+9:
- XX sprintf(buf+strlen(buf), "CLOSE%d", OP(op)-CLOSE);
- XX p = NULL;
- XX break;
- XX case STAR:
- XX p = "STAR";
- XX break;
- XX case PLUS:
- XX p = "PLUS";
- XX break;
- XX default:
- XX regerror("corrupted opcode");
- XX break;
- XX }
- XX if (p != NULL)
- XX (void) strcat(buf, p);
- XX return(buf);
- XX}
- XX#endif
-
- XX/*
- XX * The following is provided for those people who do not have strcspn() in
- XX * their C libraries. They should get off their butts and do something
- XX * about it; at least one public-domain implementation of those (highly
- XX * useful) string routines has been published on Usenet.
- XX */
- XX#ifdef STRCSPN
- XX/*
- XX * strcspn - find length of initial segment of s1 consisting entirely
- XX * of characters not from s2
- XX */
-
- XXstatic int
- XXstrcspn(s1, s2)
- XXchar *s1;
- XXchar *s2;
- XX{
- XX register char *scan1;
- XX register char *scan2;
- XX register int count;
-
- XX count = 0;
- XX for (scan1 = s1; *scan1 != '\0'; scan1++) {
- XX for (scan2 = s2; *scan2 != '\0';) /* ++ moved down. */
- XX if (*scan1 == *scan2++)
- XX return(count);
- XX count++;
- XX }
- XX return(count);
- XX}
- XX#endif
- @//E*O*F regexp.c//
- chmod u=rw,g=rw,o=rw $OUT
-
- echo x - regexp.h
- if test -f regexp.h ; then
- echo regexp.h exists, putting output in $$regexp.h
- OUT=$$regexp.h
- STATUS=1
- else
- OUT=regexp.h
- fi
- sed 's/^XX//' > $OUT <<'@//E*O*F regexp.h//'
- XX/*
- XX * Definitions etc. for regexp(3) routines.
- XX *
- XX * Caveat: this is V8 regexp(3) [actually, a reimplementation thereof],
- XX * not the System V one.
- XX */
- XX#define NSUBEXP 10
- XXtypedef struct regexp {
- XX char *startp[NSUBEXP];
- XX char *endp[NSUBEXP];
- XX char regstart; /* Internal use only. */
- XX char reganch; /* Internal use only. */
- XX char *regmust; /* Internal use only. */
- XX int regmlen; /* Internal use only. */
- XX char program[1]; /* Unwarranted chumminess with compiler. */
- XX} regexp;
-
- XXextern regexp *regcomp();
- XXextern int regexec();
- XXextern void regsub();
- XXextern void regerror();
- @//E*O*F regexp.h//
- chmod u=rw,g=rw,o=rw $OUT
-
- echo x - regmagic.h
- if test -f regmagic.h ; then
- echo regmagic.h exists, putting output in $$regmagic.h
- OUT=$$regmagic.h
- STATUS=1
- else
- OUT=regmagic.h
- fi
- sed 's/^XX//' > $OUT <<'@//E*O*F regmagic.h//'
- XX/*
- XX * The first byte of the regexp internal "program" is actually this magic
- XX * number; the start node begins in the second byte.
- XX */
- XX#define MAGIC 0234
- @//E*O*F regmagic.h//
- chmod u=rw,g=rw,o=rw $OUT
-
- echo x - rv_change.c
- if test -f rv_change.c ; then
- echo rv_change.c exists, putting output in $$rv_change.c
- OUT=$$rv_change.c
- STATUS=1
- else
- OUT=rv_change.c
- fi
- sed 's/^XX//' > $OUT <<'@//E*O*F rv_change.c//'
- XX#include "rv.h"
-
- XXvoid
- XXchange()
- XX/*
- XX * Change - change text
- XX */
- XX{
- XX register struct li_line *line;
- XX register struct sc_screen *sc;
- XX register struct wi_window *wi;
-
- XX sc = &screen;
- XX wi = &window;
-
- XX file.fi_modified = TRUE;
- XX /*
- XX * Three cases: lines, columns, or both
- XX */
- XX if (sc->sc_validcol) { /* If columns */
- XX if (sc->sc_firstline != sc->sc_lastline) { /* If both */
- XX botprint(TRUE,
- XX "Cant change columns within multiple lines yet.\n");
- XX return;
- XX }
- XX sc->sc_column = sc->sc_firstcol;
- XX insert();
- XX }
- XX else { /* If lines */
- XX if (sc->sc_firstline == sc->sc_lastline) {
- XX /*
- XX * Simple case - change 1 line
- XX */
- XX sc->sc_column = 0;
- XX sc->sc_firstcol = 0;
- XX sc->sc_lastcol = sc->sc_curline->li_width-1;
- XX yank_cmd = ' ';
- XX if (sc->sc_lastcol >= 0) {
- XX undo.un_deleted = TRUE;
- XX yank(); /* Save for later undo */
- XX }
- XX sc->sc_curline->li_width = 0;
- XX sc->sc_curline->li_segments = 1;
- XX sc->sc_curline->li_text[0] = '\0';
- XX sc->sc_lastcol = -1;
- XX insert();
- XX }
- XX else {
- XX /*
- XX * Change multiple lines
- XX */
- XX delete();
- XX sc->sc_column = 0;
- XX if (sc->sc_lineno == file.fi_numlines) /* If bottom */
- XX if (sc->sc_lineno != 1) /* If not top */
- XX openline(1);
- XX else { /* Single line in file, replace */
- XX sc->sc_firstcol = 0;
- XX sc->sc_lastcol = -1;
- XX }
- XX else
- XX openline(-1);
- XX botprint(FALSE, "%d lines changed",
- XX sc->sc_lastline - sc->sc_firstline + 1);
- XX insert();
- XX }
- XX }
- XX}
- @//E*O*F rv_change.c//
- chmod u=rw,g=rw,o=rw $OUT
-
- echo x - rv_column.c
- if test -f rv_column.c ; then
- echo rv_column.c exists, putting output in $$rv_column.c
- OUT=$$rv_column.c
- STATUS=1
- else
- OUT=rv_column.c
- fi
- sed 's/^XX//' > $OUT <<'@//E*O*F rv_column.c//'
- XX#include "rv.h"
-
- XX/*
- XX * Logical to/from physical column conversion functions
- XX */
-
- XXINT
- XXfile_column(s, maxscreen_col)
- XX/*
- XX * Convert a screen column number to a file column number.
- XX */
- XXregister char *s;
- XXregister INT maxscreen_col;
- XX{
- XX register INT c;
- XX register INT file_col, screen_col;
-
- XX if (s == NULL) {
- XX errflag = 1;
- XX botprint(TRUE, "file_column - text is null.\n");
- XX return 0;
- XX }
-
- XX file_col = 0;
- XX screen_col = 0;
- XX while (c = *s++) {
- XX if (c < ' ' || c > '~') /* control character */
- XX if (c == '\t' && !set_list) {
- XX screen_col += set_tabstops -
- XX (screen_col % set_tabstops) - 1;
- XX } else
- XX ++screen_col;
- XX ++file_col;
- XX ++screen_col;
- XX if (screen_col > maxscreen_col)
- XX break;
- XX }
-
- XX return file_col <= 0 ? 0 : file_col-1;
- XX}
-
-
-
- XXINT
- XXscreen_column(s, maxfile_col)
- XX/*
- XX * Convert a file column number to a screen column number.
- XX */
- XXregister char *s;
- XXregister INT maxfile_col;
- XX{
- XX register INT c;
- XX register INT file_col, screen_col;
-
- XX if (s == NULL) {
- XX errflag = 1;
- XX botprint(TRUE, "screen_column - text is null.\n");
- XX return 0;
- XX }
-
- XX file_col = 0;
- XX screen_col = 0;
- XX while (c = *s++) {
- XX if (c < ' ' || c > '~') /* control character */
- XX if (c == '\t' && !set_list) {
- XX if (input_mode && file_col >= maxfile_col) {
- XX ++screen_col;
- XX break;
- XX }
- XX screen_col += set_tabstops -
- XX (screen_col % set_tabstops) - 1;
- XX } else
- XX ++screen_col;
- XX ++file_col;
- XX ++screen_col;
- XX if (file_col > maxfile_col)
- XX break;
- XX }
-
- XX return screen_col <= 0 ? 0 : screen_col-1;
- XX}
- @//E*O*F rv_column.c//
- chmod u=rw,g=rw,o=rw $OUT
-
- echo x - rv_delcol.c
- if test -f rv_delcol.c ; then
- echo rv_delcol.c exists, putting output in $$rv_delcol.c
- OUT=$$rv_delcol.c
- STATUS=1
- else
- OUT=rv_delcol.c
- fi
- sed 's/^XX//' > $OUT <<'@//E*O*F rv_delcol.c//'
- XX#include "rv.h"
-
- XXvoid delete_columns(first, last)
- XX/*
- XX * Delete - delete columns from current line
- XX */
- XXINT first,last;
- XX{
- XX register struct sc_screen *sc;
- XX register struct li_line *line;
- XX register char *s1, *s2;
-
- XX sc = &screen;
- XX save_Undo();
- XX line = sc->sc_curline;
- XX /*
- XX * Compact line
- XX */
- XX s1 = &line->li_text[first];
- XX s2 = &line->li_text[last+1];
- XX while (*s1++ = *s2++)
- XX ;
- XX /*
- XX * Draw line
- XX */
- XX redraw_curline(line->li_text);
- XX /*
- XX * Adjust cursor
- XX */
- XX sc->sc_column = first;
- XX if (sc->sc_column >= line->li_width)
- XX sc->sc_column = line->li_width-1;
- XX move_cursor(sc->sc_lineno, sc->sc_column);
- XX}
- @//E*O*F rv_delcol.c//
- chmod u=rw,g=rw,o=rw $OUT
-
- echo x - rv_dot.c
- if test -f rv_dot.c ; then
- echo rv_dot.c exists, putting output in $$rv_dot.c
- OUT=$$rv_dot.c
- STATUS=1
- else
- OUT=rv_dot.c
- fi
- sed 's/^XX//' > $OUT <<'@//E*O*F rv_dot.c//'
- XX#include "rv.h"
-
- XXvoid
- XXrv_dot()
- XX/*
- XX * repeat last change
- XX */
- XX{
- XX register struct sc_screen *sc;
- XX register struct ya_yank *yk;
- XX INT direction = -1;
- XX INT saveline, savecol;
-
- XX sc = &screen;
- XX yk = &yank_array[0];
-
- XX /*
- XX * See if there is something to repeat
- XX */
- XX if (undo.un_deleted == FALSE && undo.un_inserted == FALSE) {
- XX flash();
- XX errflag = 1;
- XX return;
- XX }
-
- XX /*
- XX * Put last inserted text
- XX */
- XX if (undo.un_inserted) {
- XX saveline = sc->sc_lineno;
- XX savecol = sc->sc_column;
- XX /*
- XX * Yank text from old location
- XX */
- XX move_abs_cursor(undo.un_firstline, 0);
- XX sc->sc_firstline = undo.un_firstline;
- XX sc->sc_lastline = undo.un_lastline;
- XX if (undo.un_validcol == FALSE)
- XX sc->sc_validcol = FALSE;
- XX else {
- XX sc->sc_validcol = TRUE;
- XX sc->sc_firstcol = undo.un_firstcol;
- XX sc->sc_lastcol = undo.un_lastcol;
- XX }
- XX yank_cmd = '.';
- XX yank();
- XX move_abs_cursor(saveline, savecol);
- XX }
-
- XX /*
- XX * Repeat last deletion
- XX */
- XX if (undo.un_deleted && yk->ya_type != YANK_EMPTY) {
- XX sc->sc_firstline = sc->sc_lineno;
- XX if (yk->ya_type != YANK_COLS) {
- XX sc->sc_validcol = FALSE;
- XX sc->sc_lastline = sc->sc_firstline + yk->ya_numlines-1;
- XX }
- XX else {
- XX sc->sc_validcol = TRUE;
- XX sc->sc_lastline = sc->sc_firstline;
- XX sc->sc_firstcol = sc->sc_column;
- XX sc->sc_lastcol = sc->sc_firstcol + yk->ya_width - 1;
- XX if (sc->sc_lastcol >= sc->sc_curline->li_width)
- XX sc->sc_lastcol = sc->sc_curline->li_width-1;
- XX if (sc->sc_lastcol < 0) {
- XX flash();
- XX errflag = 1;
- XX return;
- XX }
- XX }
- XX yank_cmd = ' ';
- XX delete();
- XX }
-
- XX /*
- XX * Repeat last insertion
- XX */
- XX if (undo.un_inserted) {
- XX yank_cmd = '.';
- XX if (undo.un_validcol == TRUE)
- XX /*
- XX * Dot inserts changes before the cursor
- XX */
- XX sc->sc_column--;
- XX put(direction);
- XX }
- XX}
- @//E*O*F rv_dot.c//
- chmod u=rw,g=rw,o=rw $OUT
-
- echo Inspecting for damage in transit...
- temp=/tmp/sharin$$; dtemp=/tmp/sharout$$
- trap "rm -f $temp $dtemp; exit" 0 1 2 3 15
- cat > $temp <<\!!!
- 40 189 1543 BUGFIX
- 26 96 799 BUGFIX2
- 39 128 943 Makefile.bsd
- 38 121 910 Makefile.usg
- 60 314 2087 NEXT_REL
- 103 430 2818 README
- 66 285 1842 binsearch.c
- 20 41 231 copy.c
- 24 168 1052 copyright
- 57 57 593 Manifest
- 8 11 73 regerror.c
- 1237 4440 28158 regexp.c
- 21 86 574 regexp.h
- 5 28 153 regmagic.h
- 68 197 1435 rv_change.c
- 81 235 1483 rv_column.c
- 34 77 604 rv_delcol.c
- 88 217 1726 rv_dot.c
- 2015 7120 47024 total
- !!!
- wc $FILES | sed 's=[^ ]*/==' | diff -b $temp - >$dtemp
- if test -s $dtemp ; then
- echo "Ouch [diff of wc output]:"
- cat $dtemp
- STATUS=1
- elif test $STATUS = 0 ; then
- echo "No problems found."
- else
- echo "WARNING -- PROBLEMS WERE FOUND..."
- fi
- exit $STATUS
-